# Generate foot traffic related variables


# Setup -----
library(readxl)
library(tidyverse) 
library(tidycensus)





##############################################################################################
# Download & load FEMA's disaster declarations summaries dataset
# SOURCE: https://www.fema.gov/about/openfema/data-sets, "OpenFEMA Dataset: Disaster Declarations Summaries"
# Download and load data as "DisasterDeclarationsSummaries"

DisasterDeclarationsSummaries <- read.csv("~/Dropbox (Penn)/Data/FEMA DATA/DisasterDeclarationsSummaries.csv", stringsAsFactors = F)

DisasterDeclarationsSummaries = DisasterDeclarationsSummaries %>%
  mutate(
    declarationDate = as.Date(declarationDate),                                                                    # convert to date format
    fipsStateCountyCode = paste0(ifelse(nchar(fipsStateCode)==1, paste0("0", fipsStateCode), fipsStateCode),       # create FIPS state county 5-digit code
                                 
                                 ifelse(nchar(fipsCountyCode)==1, paste0("00", fipsCountyCode),
                                        ifelse(nchar(fipsCountyCode)==2, paste0("0", fipsCountyCode), fipsCountyCode)))
  )




# List of FL counties that declared individual assistance for Hurricane Michael ---------------

Michael_IA_list =  DisasterDeclarationsSummaries %>%    
  filter(ihProgramDeclared==1 | iaProgramDeclared==1) %>%
  filter(declarationTitle=="HURRICANE MICHAEL") %>%
  filter(fipsStateCode==12)





##############################################################################################
# 2017 tract-level population from ACS 5-year 
tract0 <- get_acs(geography = "tract", 
                  variables = c(population ="B01003_001"), 
                  year=2017,
                  state = "FL",
                  survey = "acs5")

tract_Michael = tract0 %>% 
  select(GEOID_tract    = GEOID, 
         population2017 = estimate) %>%
  mutate(
    GEOID_county = substr(GEOID_tract, 1, 5)
  ) %>%
  filter(GEOID_county %in% Michael_IA_list$fipsStateCountyCode) %>%
  filter(population2017>0)






#############################################################################################
# Download & load SafeGraph Monthly Patterns dataset
# SOURCE: https://www.safegraph.com/, available for academic, noncommercial use by submitting a data access request to SafeGraph
# Sample: Florida April 2018 to October 2019
# Download and load data as "FL_SG_1804_1910"

FL_SG_visitor = FL_SG_1804_1910 %>%
  mutate(
    date_range_start         = as.Date(date_range_start),  
    month                    = format(date_range_start, "%y-%m"),                                      # month of visitation records
    visitor_home_aggregation = substr(visitor_home_aggregation, 2, nchar(visitor_home_aggregation)-1)  # "visitor_home_aggregation" - The number of visitors to the business from each census tract based on the visitor's home location; e.g., {\"12069030108\":4} 
         ) %>%
  filter(region=="FL") %>%                                # only keep FL businesses
  filter(date_range_start>=as.Date("2018-04-01") &
         date_range_start<as.Date("2019-11-01")) %>%
  select(placekey, month, visitor_home_aggregation,
         brands, naics_code, latitude, longitude)



# convert "visitor_home_aggregation" (business-time structure) into (visitor tract-business-time structure)
FL_SG_visitor_v2=c()
for (i in 1:nrow(FL_SG_visitor)) {
  test_visit              = str_split(FL_SG_visitor$visitor_home_aggregation[i], ",", simplify = TRUE) %>% t()
  test_visit_new          = data.frame(visitor_home_aggregation=test_visit)
  test_visit_new$placekey = FL_SG_visitor$placekey[i]
  test_visit_new$month    = FL_SG_visitor$month[i]
  FL_SG_visitor_v2        = bind_rows(FL_SG_visitor_v2, test_visit_new)
  
  if(i %% 1000 == 0){
    cat(i)
    cat("..")
  }
}

FL_SG_visitor_v2 = FL_SG_visitor_v2 %>% 
  separate(visitor_home_aggregation, c("visitor_home_tract", "visitor_count_home_tract"), sep = '":') %>%
  mutate(
    visitor_count_home_tract = as.numeric(visitor_count_home_tract),
    visitor_home_tract       = as.numeric(substr(visitor_home_tract, 2, nchar(visitor_home_tract)))
         ) %>%
  filter(!is.na(visitor_count_home_tract)) %>% 
  filter(!is.na(visitor_home_tract))






# create a balanced panel of tract (visitor from Michael-affected areas) + business + month ------------------------

list_month        = unique((FL_SG_visitor$month))
list_FL_DIA_tract = unique(as.numeric(tract_Michael$GEOID_tract))
FL_business_list  = FL_SG_visitor %>% 
  select(placekey, brands, naics_code) %>% 
  group_by(placekey) %>% 
  filter(!duplicated(placekey))


FL_visitor_1804_1910_tract = c()
for (j in 1:length(list_month)) {                                                     
  FL_SG_visitor_v2_new_j = FL_SG_visitor_v2[FL_SG_visitor_v2$month==list_month[j],]  # select all visitation records for month j
  test_visitor_ij=c()
  for (i in 1:length(list_FL_DIA_tract)) {
    business_list_i = FL_business_list %>%                                           # For all FL businesses, identify # of visitors from tract i
      mutate(visitor_home_tract = list_FL_DIA_tract[i]) %>%
      left_join(FL_SG_visitor_v2_new_j, by=c("visitor_home_tract", "placekey"))
    
    business_list_i$visitor_count_home_tract = ifelse(is.na(business_list_i$visitor_count_home_tract), 0, business_list_i$visitor_count_home_tract)
    
    test_visitor_ij = bind_rows(test_visitor_ij, business_list_i)
    
    
    test_visitor_ij$month = FL_SG_visitor_v2_new_j$month[1]
    
    if(i %% 10 == 0){
      cat(i)
      cat("..")
    }
  }
  
  FL_visitor_1804_1910_tract = bind_rows(FL_visitor_1804_1910_tract, test_visitor_ij)
  if(j %% 1 == 0){
    cat(j)
    cat("..")
  }  
}





# Extract business location flood depth using FSF's Hurricane Michael Flood Depth Shapefile ------------------------------------------
# SOURCE: https://firststreet.org/data-access, available for purchase at FSF
# business latl/lon in "FL_business_list" table

flood_business = flood_business %>% 
  group_by(placekey) %>% 
  filter(!duplicated(placekey)) %>% 
  mutate(FloodDepth_ft_location = hist1018/30.48)

FL_visitor_1804_1910_tract = FL_visitor_1804_1910_tract %>% 
  left_join(flood_business[, c("placekey", "FloodDepth_ft_location")], by="placekey") %>%
  mutate(FloodDepth_ft_location = ifelse(is.na(FloodDepth_ft_location), 0, FloodDepth_ft_location))









# Create New Variables -------------------------------------------------------------

FL_visitor_1804_1910_tract$dummy_brands = ifelse(FL_visitor_1804_1910_tract$brands!="", 1, 0)
FL_visitor_1804_1910_tract$NAICS_2digit = substr(FL_visitor_1804_1910_tract$naics_code, 1, 2)

FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="11" ] <- "Agriculture, Forestry, Fishing and Hunting"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="21" ] <- "Mining"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="22" ] <- "Utilities"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="23" ] <- "Construction"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="31" |
                                              FL_visitor_1804_1910_tract$NAICS_2digit=="32" |  
                                              FL_visitor_1804_1910_tract$NAICS_2digit=="33" ] <- "Manufacturing"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="42" ] <- "Wholesale Trade"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="44" |
                                              FL_visitor_1804_1910_tract$NAICS_2digit=="45" ] <- "Retail Trade"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="48" |
                                              FL_visitor_1804_1910_tract$NAICS_2digit=="49" ] <- "Transportation and Warehousing"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="51" ] <- "Information"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="52" ] <- "Finance and Information"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="53" ] <- "Real Estate Rental and Leasing"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="54" ] <- "Professional, Scientific, and Technical Services"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="55" ] <- "Management of Companies and Enterprises"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="56" ] <- "Administrative and Support and Waste Management and Remediation Services"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="61" ] <- "Educational Services"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="62" ] <- "Health Care and Social Assistance"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="71" ] <- "Arts, Entertainment, and Recreation"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="72" ] <- "Accommodation and Food Services"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="81" ] <- "Other Services (except Public Administration)"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ FL_visitor_1804_1910_tract$NAICS_2digit=="92" ] <- "Public Administration"
FL_visitor_1804_1910_tract$NAICS_2digit_name[ is.na(FL_visitor_1804_1910_tract$NAICS_2digit) ] = "Missing"

FL_visitor_1804_1910_tract$visitor_home_tract_placekey = paste0(FL_visitor_1804_1910_tract$visitor_home_tract, ", ", FL_visitor_1804_1910_tract$placekey)

FL_visitor_1804_1910_tract$Post = ifelse(
  FL_visitor_1804_1910_tract$month=="18-04" |
    FL_visitor_1804_1910_tract$month=="18-05" |
    FL_visitor_1804_1910_tract$month=="18-06" |
    FL_visitor_1804_1910_tract$month=="18-07" |
    FL_visitor_1804_1910_tract$month=="18-08" |
    FL_visitor_1804_1910_tract$month=="18-09" , 0, 1)
